************************************************
***Title: weather_association_distribution.do
***Creators: Joelle Abramowitz, Shooshan Danagoulian, and Owen Fleming*
***Notes: This file produces a histogram of the R-squared values from the regressions of pollen on weather for each county. The file produces two such histograms. The first is a histogram for all counties, and the second is a histogram for all counties in each Census region. 

*For questions, contact
*Owen Fleming
*hg3490@wayne.edu
************************************************


**********SETUP
use data/for_analysis, clear

levelsof county
global county_levels = r(levels)

preserve
foreach i in $county_levels {
eststo, title("`i'"): reg pollen $weather if county == `i' 

scalar r2 = e(r2)
scalar county = `i'
matrix r2_`i' = county , r2
matrix colnames r2_`i' = "county" "r2"
	
}

matrix r2 = . , .
matrix colnames r2 = "county" "r2"
foreach i in $county_levels {
matrix r2 = r2 \ r2_`i'
}

clear
svmat double r2, names(col)
drop if missing(r2)
egen median_r2 = median(r2)
save data/r2, replace
restore

merge m:1 county using data/r2 
drop _merge
drop if missing(r2)
erase data/r2.dta

*merge region onto the data
replace statename = strproper(statename)
merge m:1 statename using data/other/census_regions
drop if _merge==2
drop _merge
replace regionname = "South" if statename == "District Of Columbia"

keep county r2 regionname
duplicates drop


**********PRODUCE VISUALIZATIONS
histogram r2 if region=="Midwest", start(0) width(0.025) title("Midwest") xtitle("R-squared") bcolor(navy) fraction graphregion(color(white)) 
graph save results/Midwest, replace

histogram r2 if region=="South", start(0) width(0.025) title("South") xtitle("R-squared")  bcolor(navy) fraction graphregion(color(white)) 
graph save results/South, replace

histogram r2 if region=="Northeast", start(0) width(0.025) title("Northeast") xtitle("R-squared")  bcolor(navy) fraction graphregion(color(white)) 
graph save results/Northeast, replace

histogram r2 if region=="West", start(0) width(0.025) title("West") xtitle("R-squared")  bcolor(navy) fraction graphregion(color(white)) 
graph save results/West, replace


**********EXPORT
histogram r2, start(0) width(0.025) title("Distribution of R-squared from Regression of" "Pollen on Weather: All Regions") xtitle("R-squared") bcolor(navy) fraction graphregion(color(white)) 
graph export results/weather_association_distribution_all.png, replace

gr combine results/Midwest.gph results/South.gph results/Northeast.gph results/West.gph, xcommon ycommon graphregion(color(white)) title("Distribution of R-squared from Regression of Pollen on Weather")
graph export results/weather_association_distribution_by_region.png, replace




